#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("limma")


setwd("E:\\metabolism\\LGG\\17.Model_gene\\3.Survival\\CGGA")   
#library packages
library(limma)
library(survival)
library(survminer)

expFile="GeneExp.txt"     #expression data file
cliFile="OS.txt"            #survival data file

#Read expression files and tidy up input files
rt=read.table(expFile, header=T, sep="\t", check.names=F)
rt=as.matrix(rt)
rownames(rt)=rt[,1]
exp=rt[,2:ncol(rt)]
dimnames=list(rownames(exp), colnames(exp))
data=matrix(as.numeric(as.matrix(exp)), nrow=nrow(exp), dimnames=dimnames)
data=avereps(data)
data=data[rowMeans(data)>0,]
data=t(data)

#read survival data file
cli=read.table(cliFile, header=T, sep="\t", check.names=F, row.names=1)
cli$futime=cli$futime/365

#data merge
sameSample=intersect(row.names(data), row.names(cli))
data=data[sameSample,]
cli=cli[sameSample,]
rt=cbind(cli, data)

#Cycle through genes to find genes associated with prognosis
outTab=data.frame()
km=c()
kmHR=c()
kmup95=c()
kmlow95=c()
for(i in colnames(rt[,3:ncol(rt)])){
  #cox analysis
  cox <- coxph(Surv(futime, fustat) ~ rt[,i], data = rt)
  coxSummary = summary(cox)
  coxP=coxSummary$coefficients[,"Pr(>|z|)"]
  outTab=rbind(outTab,
               cbind(id=i,
                     HR=coxSummary$conf.int[,"exp(coef)"],
                     HR.95L=coxSummary$conf.int[,"lower .95"],
                     HR.95H=coxSummary$conf.int[,"upper .95"],
                     pvalue=coxSummary$coefficients[,"Pr(>|z|)"])
  )
  #km analysis
  data=rt[,c("futime", "fustat", i)]
  colnames(data)=c("futime", "fustat", "gene")
  #Get the best cutoff
  res.cut=surv_cutpoint(data, time = "futime", event = "fustat", variables =c("gene"))
  res.cat=surv_categorize(res.cut)
  fit=survfit(Surv(futime, fustat) ~gene, data = res.cat)
  #print(paste0(i, " ", res.cut$cutpoint[1]))
  #Compare high and low expression survival differences
  diff=survdiff(Surv(futime, fustat) ~gene,data =res.cat)
  pValue=1-pchisq(diff$chisq, df=1)
  HR=(diff$obs[1]/diff$exp[1])/(diff$obs[2]/diff$exp[2])
  up95 = exp(log(HR) + qnorm(0.975)*sqrt(1/diff$exp[1]+1/diff$exp[2]))
  low95 = exp(log(HR) - qnorm(0.975)*sqrt(1/diff$exp[1]+1/diff$exp[2]))
  km=c(km, pValue)
  kmHR=c(kmHR, HR)
  kmup95=c(kmup95,up95)
  kmlow95=c(kmlow95,low95)
  #Plot survival curves for genes with pvalue < 0.05
  if(pValue<0.05){
    if(pValue<0.001){
      pValue="p<0.001"
    }else{
      pValue=paste0("p=",sprintf("%.03f",pValue))
    }
    
    #Plot a survival curve
    surPlot=ggsurvplot(fit,
                       data=res.cat,
                       pval=pValue,
                       pval.size=6,
                       legend.title=i,
                       legend.labs=c("high","low"),
                       xlab="Time(years)",
                       ylab="Overall survival",
                       palette=c("#AB3A2C", "#4A71C0"),
                       break.time.by=1,
                       conf.int=F,
                       risk.table=F,
                       risk.table.title="",
                       risk.table.height=.25)
    pdf(file=paste0("sur.", i, ".OS.pdf"),onefile = FALSE,
        width = 5,         
        height =4.5)         
    print(surPlot)
    dev.off()
  }
}

#output single factor results
outTab=cbind(outTab, km,kmHR,kmup95,kmlow95)
write.table(outTab,file="uniCox.OS.txt",sep="\t",row.names=F,quote=F)
